Analyse der Matoma-HaNS-Daten

1 Setup

1.1 R-Pakete starten

Show the code
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)
Show the code
theme_set(theme_minimal())

1.2 Roh-Daten laden und inspizieren (data_all_fact)

JSON-Daten wurden nicht importiert, da offenbar nur redundante Daten enthalten sind.

Show the code
tar_load(data_all_fct)

1.2.1 Dimension

Der Roh-Datensatz verfügt über

  • 2483 Zeilen
  • 7456 Spalten (Dubletten und Spalten mit Bildern bereits entfernt)

Jede Zeile entspricht einem “Visit”.

1.2.2 Erster Blick

Show the code
data_all_fct_head100 <- 
data_all_fct %>% 
  select(1:100) %>% 
  slice_head(n = 100) 
Show the code
data_all_fct_head100 %>% 
  visdat::vis_dat()

1.2.3 Namen (1-100)

Show the code
data_all_fct_head100 %>% 
  names()
  [1] "idvisit"                                 
  [2] "visitorid"                               
  [3] "fingerprint"                             
  [4] "actiondetails_0_type"                    
  [5] "actiondetails_0_url"                     
  [6] "actiondetails_0_pageidaction"            
  [7] "actiondetails_0_idpageview"              
  [8] "actiondetails_0_servertimepretty"        
  [9] "actiondetails_0_pageid"                  
 [10] "actiondetails_0_eventcategory"           
 [11] "actiondetails_0_eventaction"             
 [12] "actiondetails_0_pageviewposition"        
 [13] "actiondetails_0_timestamp"               
 [14] "actiondetails_0_title"                   
 [15] "actiondetails_0_subtitle"                
 [16] "actiondetails_1_type"                    
 [17] "actiondetails_1_url"                     
 [18] "actiondetails_1_pagetitle"               
 [19] "actiondetails_1_pageidaction"            
 [20] "actiondetails_1_idpageview"              
 [21] "actiondetails_1_servertimepretty"        
 [22] "actiondetails_1_pageid"                  
 [23] "actiondetails_1_pageloadtime"            
 [24] "actiondetails_1_timespent"               
 [25] "actiondetails_1_timespentpretty"         
 [26] "actiondetails_1_pageloadtimemilliseconds"
 [27] "actiondetails_1_pageviewposition"        
 [28] "actiondetails_1_title"                   
 [29] "actiondetails_1_subtitle"                
 [30] "actiondetails_1_timestamp"               
 [31] "actiondetails_2_type"                    
 [32] "actiondetails_2_url"                     
 [33] "actiondetails_2_pageidaction"            
 [34] "actiondetails_2_idpageview"              
 [35] "actiondetails_2_servertimepretty"        
 [36] "actiondetails_2_pageid"                  
 [37] "actiondetails_2_eventcategory"           
 [38] "actiondetails_2_eventaction"             
 [39] "actiondetails_2_pageviewposition"        
 [40] "actiondetails_2_timestamp"               
 [41] "actiondetails_2_title"                   
 [42] "actiondetails_2_subtitle"                
 [43] "actiondetails_3_type"                    
 [44] "actiondetails_3_url"                     
 [45] "actiondetails_3_pagetitle"               
 [46] "actiondetails_3_pageidaction"            
 [47] "actiondetails_3_idpageview"              
 [48] "actiondetails_3_servertimepretty"        
 [49] "actiondetails_3_pageid"                  
 [50] "actiondetails_3_timespent"               
 [51] "actiondetails_3_timespentpretty"         
 [52] "actiondetails_3_pageviewposition"        
 [53] "actiondetails_3_title"                   
 [54] "actiondetails_3_subtitle"                
 [55] "actiondetails_3_timestamp"               
 [56] "actiondetails_4_type"                    
 [57] "actiondetails_4_url"                     
 [58] "actiondetails_4_pageidaction"            
 [59] "actiondetails_4_idpageview"              
 [60] "actiondetails_4_servertimepretty"        
 [61] "actiondetails_4_pageid"                  
 [62] "actiondetails_4_sitesearchkeyword"       
 [63] "actiondetails_4_sitesearchcount"         
 [64] "actiondetails_4_pageviewposition"        
 [65] "actiondetails_4_title"                   
 [66] "actiondetails_4_subtitle"                
 [67] "actiondetails_4_timestamp"               
 [68] "actiondetails_5_type"                    
 [69] "actiondetails_5_url"                     
 [70] "actiondetails_5_pageidaction"            
 [71] "actiondetails_5_idpageview"              
 [72] "actiondetails_5_servertimepretty"        
 [73] "actiondetails_5_pageid"                  
 [74] "actiondetails_5_eventcategory"           
 [75] "actiondetails_5_eventaction"             
 [76] "actiondetails_5_pageviewposition"        
 [77] "actiondetails_5_timestamp"               
 [78] "actiondetails_5_title"                   
 [79] "actiondetails_5_subtitle"                
 [80] "actiondetails_6_type"                    
 [81] "actiondetails_6_url"                     
 [82] "actiondetails_6_pagetitle"               
 [83] "actiondetails_6_pageidaction"            
 [84] "actiondetails_6_idpageview"              
 [85] "actiondetails_6_servertimepretty"        
 [86] "actiondetails_6_pageid"                  
 [87] "actiondetails_6_timespent"               
 [88] "actiondetails_6_timespentpretty"         
 [89] "actiondetails_6_pageviewposition"        
 [90] "actiondetails_6_title"                   
 [91] "actiondetails_6_subtitle"                
 [92] "actiondetails_6_timestamp"               
 [93] "actiondetails_7_type"                    
 [94] "actiondetails_7_url"                     
 [95] "actiondetails_7_pageidaction"            
 [96] "actiondetails_7_idpageview"              
 [97] "actiondetails_7_servertimepretty"        
 [98] "actiondetails_7_pageid"                  
 [99] "actiondetails_7_eventcategory"           
[100] "actiondetails_7_eventaction"             

1.2.4 Werte der erst 100 Spalten

Show the code
data_all_fct_head100 %>% 
  glimpse()
Rows: 100
Columns: 100
$ idvisit                                  <fct> 19, 20, 18, 16, 17, 15, 14, 1…
$ visitorid                                <fct> 01357ce636fa78c2, 01357ce636f…
$ fingerprint                              <fct> 9ffcf86ca880ddaa, 9ffcf86ca88…
$ actiondetails_0_type                     <fct> event, action, action, action…
$ actiondetails_0_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_0_pageidaction             <fct> 17, 32, 2, 32, 17, 32, 192, 3…
$ actiondetails_0_idpageview               <fct> 1YNiVr, pMCs2U, DNNr9n, c6Mr7…
$ actiondetails_0_servertimepretty         <fct> "Mar 4, 2024 22:58:30", "Mar …
$ actiondetails_0_pageid                   <fct> 6509, 6510, 5632, 5620, 5621,…
$ actiondetails_0_eventcategory            <fct> click_button, , , , click_but…
$ actiondetails_0_eventaction              <fct> Kanäle, , , , Kanäle, , , , ,…
$ actiondetails_0_pageviewposition         <fct> NA, 1, 1, 1, NA, 1, 1, 1, 1, …
$ actiondetails_0_timestamp                <fct> 2024-03-04 22:58:30, 2024-03-…
$ actiondetails_0_title                    <fct> Event, HAnS, HAnS, HAnS, Even…
$ actiondetails_0_subtitle                 <fct> "Category: \"\"click_button',…
$ actiondetails_1_type                     <fct> action, , event, action, , ev…
$ actiondetails_1_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_1_pagetitle                <fct> HAnS, , , HAnS, , , , , , , ,…
$ actiondetails_1_pageidaction             <fct> 32, NA, 3, 36, NA, 191, NA, 3…
$ actiondetails_1_idpageview               <fct> HKiQ62, , DNNr9n, sXx3s2, , D…
$ actiondetails_1_servertimepretty         <fct> "Mar 4, 2024 23:16:59", "", "…
$ actiondetails_1_pageid                   <fct> 6511, NA, 5633, 5622, NA, 528…
$ actiondetails_1_pageloadtime             <fct> 0.94s, , , , , , , , , , , , …
$ actiondetails_1_timespent                <fct> 21, NA, NA, 200, NA, NA, NA, …
$ actiondetails_1_timespentpretty          <fct> 21s, , , 3 min 20s, , , , , ,…
$ actiondetails_1_pageloadtimemilliseconds <fct> 940, NA, NA, NA, NA, NA, NA, …
$ actiondetails_1_pageviewposition         <fct> 1, NA, 1, 2, NA, 1, NA, 1, 2,…
$ actiondetails_1_title                    <fct> HAnS, , Event, HAnS, , Event,…
$ actiondetails_1_subtitle                 <fct> "https://hans.th-nuernberg.de…
$ actiondetails_1_timestamp                <fct> 2024-03-04 23:16:59, NA, 2024…
$ actiondetails_2_type                     <fct> event, , action, event, , eve…
$ actiondetails_2_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_2_pageidaction             <fct> 33, NA, 147, 33, NA, 33, NA, …
$ actiondetails_2_idpageview               <fct> HKiQ62, , 1wrpnl, c6Mr7k, , z…
$ actiondetails_2_servertimepretty         <fct> "Mar 4, 2024 23:17:20", "", "…
$ actiondetails_2_pageid                   <fct> 6512, NA, 5634, 5623, NA, 528…
$ actiondetails_2_eventcategory            <fct> click_channelcard, , , click_…
$ actiondetails_2_eventaction              <fct> "ZELLKU", "", "", "ETECHEN", …
$ actiondetails_2_pageviewposition         <fct> 1, NA, 2, 1, NA, 1, NA, 2, 2,…
$ actiondetails_2_timestamp                <fct> 2024-03-04 23:17:20, NA, 2024…
$ actiondetails_2_title                    <fct> Event, , HAnS, Event, , Event…
$ actiondetails_2_subtitle                 <fct> "Category: \"\"click_channelc…
$ actiondetails_3_type                     <fct> action, , search, search, , a…
$ actiondetails_3_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_3_pagetitle                <fct> HAnS, , , , , HAnS, , HAnS, ,…
$ actiondetails_3_pageidaction             <fct> 36, NA, NA, NA, NA, 32, NA, 3…
$ actiondetails_3_idpageview               <fct> E8FkLA, , 1wrpnl, sXx3s2, , U…
$ actiondetails_3_servertimepretty         <fct> "Mar 4, 2024 23:17:20", "", "…
$ actiondetails_3_pageid                   <fct> 6513, NA, 5635, 5624, NA, 528…
$ actiondetails_3_timespent                <fct> 10, NA, NA, NA, NA, 3, NA, 45…
$ actiondetails_3_timespentpretty          <fct> 10s, , , , , 3s, , 45s, , , 1…
$ actiondetails_3_pageviewposition         <fct> 2, NA, 2, 2, NA, 2, NA, 2, NA…
$ actiondetails_3_title                    <fct> HAnS, , Site Search, Site Sea…
$ actiondetails_3_subtitle                 <fct> "https://hans.th-nuernberg.de…
$ actiondetails_3_timestamp                <fct> 2024-03-04 23:17:20, NA, 2024…
$ actiondetails_4_type                     <fct> search, , event, event, , eve…
$ actiondetails_4_url                      <fct> , , https://hans.th-nuernberg…
$ actiondetails_4_pageidaction             <fct> NA, NA, 246, 38, NA, 33, NA, …
$ actiondetails_4_idpageview               <fct> E8FkLA, , 1wrpnl, sXx3s2, , U…
$ actiondetails_4_servertimepretty         <fct> "Mar 4, 2024 23:17:20", "", "…
$ actiondetails_4_pageid                   <fct> 6514, NA, 5637, 5625, NA, 528…
$ actiondetails_4_sitesearchkeyword        <fct> "ZELLKU", "", "", "", "", "",…
$ actiondetails_4_sitesearchcount          <fct> 0, NA, NA, NA, NA, NA, NA, NA…
$ actiondetails_4_pageviewposition         <fct> 2, NA, 3, 3, NA, 2, NA, 3, NA…
$ actiondetails_4_title                    <fct> Site Search, , Event, Event, …
$ actiondetails_4_subtitle                 <fct> "ZELLKU", "", "Category: \"\"…
$ actiondetails_4_timestamp                <fct> 2024-03-04 23:17:20, NA, 2024…
$ actiondetails_5_type                     <fct> event, , action, action, , ac…
$ actiondetails_5_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_5_pageidaction             <fct> 38, NA, 95, 244, NA, 36, NA, …
$ actiondetails_5_idpageview               <fct> E8FkLA, , wZEbOS, K7yIXJ, , T…
$ actiondetails_5_servertimepretty         <fct> "Mar 4, 2024 23:17:30", "", "…
$ actiondetails_5_pageid                   <fct> 6515, NA, 5638, 5626, NA, 529…
$ actiondetails_5_eventcategory            <fct> click_videocard, , , , , , , …
$ actiondetails_5_eventaction              <fct> "2-ELISA", "", "", "", "", ""…
$ actiondetails_5_pageviewposition         <fct> 3, NA, 4, 4, NA, 3, NA, 4, NA…
$ actiondetails_5_timestamp                <fct> 2024-03-04 23:17:30, NA, 2024…
$ actiondetails_5_title                    <fct> Event, , HAnS, HAnS, , HAnS, …
$ actiondetails_5_subtitle                 <fct> "Category: \"\"click_videocar…
$ actiondetails_6_type                     <fct> action, , event, event, , sea…
$ actiondetails_6_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_6_pagetitle                <fct> HAnS, , , , , , , , , , HAnS,…
$ actiondetails_6_pageidaction             <fct> 274, NA, 247, 245, NA, NA, NA…
$ actiondetails_6_idpageview               <fct> DXTmIN, , wZEbOS, K7yIXJ, , T…
$ actiondetails_6_servertimepretty         <fct> "Mar 4, 2024 23:17:30", "", "…
$ actiondetails_6_pageid                   <fct> 6516, NA, 5639, 5627, NA, 529…
$ actiondetails_6_timespent                <fct> 14, NA, NA, NA, NA, NA, NA, N…
$ actiondetails_6_timespentpretty          <fct> 14s, , , , , , , , , , 30s, ,…
$ actiondetails_6_pageviewposition         <fct> 4, NA, 4, 4, NA, 3, NA, 4, NA…
$ actiondetails_6_title                    <fct> HAnS, , Event, Event, , Site …
$ actiondetails_6_subtitle                 <fct> "https://hans.th-nuernberg.de…
$ actiondetails_6_timestamp                <fct> 2024-03-04 23:17:30, NA, 2024…
$ actiondetails_7_type                     <fct> event, , action, event, , eve…
$ actiondetails_7_url                      <fct> https://hans.th-nuernberg.de/…
$ actiondetails_7_pageidaction             <fct> 275, NA, 147, 245, NA, 38, NA…
$ actiondetails_7_idpageview               <fct> DXTmIN, , WXg3TG, K7yIXJ, , T…
$ actiondetails_7_servertimepretty         <fct> "Mar 4, 2024 23:17:31", "", "…
$ actiondetails_7_pageid                   <fct> 6517, NA, 5640, 5628, NA, 529…
$ actiondetails_7_eventcategory            <fct> videoplayer_click, , , genera…
$ actiondetails_7_eventaction              <fct> "play", "", "", "3", "", "Vor…

1.2.5 Datensatz data_slim, Zeilen 1-100

Show the code
tar_load(data_slim)

data_slim %>% 
  filter(idvisit == 1) %>% 
  head(100)
[1] nr      type    value   idvisit
<0 rows> (or 0-length row.names)

1.3 Datensatz nur User

Entfernt man Developer, Admins und Lecturers aus dem Roh-Datensatz so bleiben weniger Zeilen übrig:

Show the code
tar_load(data_users_only)
  • 1932 Zeilen
  • 7456 Spalten

1.4 Datensatz mit Anzahl der Aktionen pro User

Show the code
tar_load(count_action)

1.5 Zeitraum

1.5.1 Beginn/Ende der Daten

Show the code
tar_load(config)

Laut config.yaml ist das aktuelle Semester 24-ss.

Show the code
tar_load(time_minmax)
Show the code
time_minmax |> 
  summarise(time_min = min(time_min),
            time_max = max(time_max)) |> 
  gt()
time_min time_max
2024-03-04 09:40:13 2024-06-06 11:06:14

Diese Statistik wurde auf Basis des Datenobjekts data_slim berechnet.

1.5.2 Days since last visit

Show the code
tar_load(time_since_last_visit)


time_since_last_visit <- 
time_since_last_visit |> 
  mutate(dayssincelastvisit = as.numeric(dayssincelastvisit)) 

time_since_last_visit |> 
  datawizard::describe_distribution(dayssincelastvisit) |> 
  knitr::kable()
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
dayssincelastvisit 3.95333 8.365608 0 1 42 3.0326 8.31164 2357 126
Show the code
time_since_last_visit |>
  ggplot(aes(x=dayssincelastvisit)) +
  geom_density()

1.6 Statistiken

1.6.1 Mit den 499er-Daten

Show the code
count_action |> 
  describe_distribution(n_max) |> 
  gt() |> 
  fmt_number(columns = where(is.numeric),
             decimals = 2)
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
n_max 408.91 135.04 212.00 2.00 499.00 −1.12 −0.19 1,602.00 0.00

1.6.2 Ohne die 499er-Daten

Show the code
count_action2 <- 
count_action |> 
  filter(n_max != 499) 

count_action2 |> 
  describe_distribution(n_max) |> 
  gt() |> 
  fmt_number(columns = where(is.numeric),
             decimals = 2)
Variable Mean SD IQR Min Max Skewness Kurtosis n n_Missing
n_max 243.11 96.73 84.00 2.00 467.00 0.05 −0.01 564.00 0.00

1.7 Verteilung

1.7.1 Mit den 499er-Daten

Show the code
count_action_avg = mean(count_action$n_max)
count_action_sd = sd(count_action$n_max)

count_action |> 
  ggplot() +
  geom_histogram(aes(x = n_max)) +
  labs(x = "Anzahl von Aktionen pro Visit",
       y = "n",
       caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
  theme_minimal() +
  geom_vline(xintercept = count_action_avg,
             color = palette_okabe_ito()[1]) +
  geom_segment(x = count_action_avg-count_action_sd,
               y = 0,
               xend = count_action_avg + count_action_sd,
               yend = 0,
               color = palette_okabe_ito()[2],
               size = 2) +
  annotate("label", x = count_action_avg, y = 1500, label = "MW") +
  annotate("label", x = count_action_avg + count_action_sd, y = 0, label = "SD")

Show the code
  #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")
  • Mittelwert der Aktionen pro Visit: 408.91.
  • SD der Aktionen pro Visit: 135.04.

1.7.2 Ohne 499er-Daten

Show the code
count_action_avg2 = mean(count_action2$n_max)
count_action_sd2 = sd(count_action2$n_max)

count_action2 |> 
  ggplot() +
  geom_histogram(aes(x = n_max)) +
  labs(x = "Anzahl von Aktionen pro Visit",
       y = "n",
       title = "Verteilung der User-Aktionen pro Visit",
       caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
  theme_minimal() +
  geom_vline(xintercept = count_action_avg2,
             color = palette_okabe_ito()[1]) +
  geom_segment(x = count_action_avg-count_action_sd2,
               y = 0,
               xend = count_action_avg2 + count_action_sd2,
               yend = 0,
               color = palette_okabe_ito()[2],
               size = 2) +
  annotate("label", x = count_action_avg2, y = 1500, label = "MW", vjust = "top") +
  annotate("label", x = count_action_avg2 + count_action_sd2, y = 0, label = "SD", vjust = "bottom")

Show the code
  #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")
  • Mittelwert der Aktionen pro Visit: 243.11.
  • SD der Aktionen pro Visit: 96.73.

2 Zeit pro Visit

Die Visit-Zeit wurde auf 600 Min. trunkiert/begrenzt.

Show the code
tar_load(time_spent)
tar_load(time_duration)

time_spent <- 
  time_spent |> 
  mutate(t_min = as.numeric(time_diff, units = "mins")) |> 
  filter(t_min < 600)

2.1 Verweildauer-Statistiken in Sekunden

Show the code
time_spent |> 
  summarise(
    mean_time_diff = round(mean(time_diff), 2),
    sd_time_diff = sd(time_diff),
    min_time_diff = min(time_diff),
    max_time_diff = max(time_diff)
  ) |> 
  summarise(
    mean_time_diff_avg = mean(mean_time_diff),
    sd_time_diff_avg = mean(sd_time_diff, na.rm = TRUE),
    min_time_diff_avg = mean(min_time_diff),
    max_time_diff_avg = mean(max_time_diff)
  ) |> 
  gt() |> 
  fmt_number(columns = everything(),
             decimals = 2)
mean_time_diff_avg sd_time_diff_avg min_time_diff_avg max_time_diff_avg
62.87 0.00 62.87 62.87
Show the code
tar_load(time_duration)

time_duration |> 
  summarise(duration_sec_avg = mean(visitduration_sec, na.rm = TRUE))  |> 
  mutate(duration_min_avg = duration_sec_avg / 60)
  duration_sec_avg duration_min_avg
1          951.224         15.85373

2.2 Verweildauer-Statistiken in Minuten

Show the code
time_spent |> 
  summarise(
    mean_t_min = mean(t_min),
    sd_t_min = sd(t_min),
    min_t_min = min(t_min),
    max_t_min = max(t_min)
  ) |> 
   summarise(
    mean_t_min_avg = mean(mean_t_min),
    sd_t_min_avg = mean(sd_t_min, na.rm = TRUE),
    min_t_min_avg = mean(min_t_min),
    max_t_min_avg = mean(max_t_min)
  ) |>
  gt() |> 
  fmt_number(columns = everything(),
             decimals = 2)
mean_t_min_avg sd_t_min_avg min_t_min_avg max_t_min_avg
62.87 0.00 62.87 62.87

2.3 Visualisierung der Verweildauer

2.3.1 bins=20

Show the code
time_spent |> 
  ggplot(aes(x = t_min)) +
  geom_histogram() +
  scale_x_time() +
  theme_minimal() +
  labs(y = "n",
       x = "Verweildauer in HaNS pro Visit in Minuten")

2.3.2 bins=100

Show the code
time_spent |> 
  ggplot(aes(x = t_min)) +
  geom_histogram(binwidth = 5) +
  theme_minimal() +
  labs(y = "n",
       x = "Verweildauer in Minuten",
       title = "Verweildauer in HaNS pro Visit",
       caption = "binwidth = 5 Min.")

2.3.3 Zeitdauer begrenzt auf 1-120 Min.

Show the code
time_spent2 <- 
time_spent |> 
  filter(t_min > 1, t_min < 120) 

time_spent2 |> 
  ggplot(aes(x = t_min)) +
  geom_histogram(binwidth = 10) +
  theme_minimal() +
  labs(y = "n",
       x = "Verweildauer in HaNS pro Visit in Minuten",
       title = "Verweildauer begrenzt auf 1-120 Minuten",
       caption = "bindwidth = 10 Min.")

3 Was machen die User?

Show the code
tar_load(count_action_type)

3.1 Statistiken

Show the code
count_action_type |> 
  count(category, sort = TRUE) |> 
  gt()
category n
NA 740858
video 40800
visit_page 4353
click_slideChange 3364
Search Results Count 419
login 387
click_topic 342
in_media_search 173
Kanäle 86
Medien 78
click_channelcard 55
GESOA 44

3.2 Verteilung

3.2.1 Rohwerte

Show the code
count_action_type |> 
  count(category, sort = TRUE) |> 
  ggplot(aes(y = reorder(category, n), x = n)) +
  geom_col() +
  geom_bar_text() +
  labs(
    x = "User-Aktion",
    y = "Aktion",
    title = "Anzahl der User-Aktionen nach Kategorie"
  ) +
  theme_minimal() +
  scale_x_continuous(labels = scales::comma)

3.2.2 Log-Skalierung

Show the code
count_action_type |> 
  count(category, sort = TRUE) |> 
  ggplot(aes(y = reorder(category, n), x = n)) +
  geom_col() +
  geom_bar_text() +
  labs(
    x = "Anazhl der User-Aktionen",
    y = "Aktion",
    title = "Anzahl der User-Aktionen nach Kategorie",
    caption = "Log10-Skala"
  ) +
  theme_minimal() +
  scale_x_log10()

4 An welchen Tagen und zu welcher Zeit kommen die User zu HaNS?

4.1 Setup

Show the code
tar_load(time_visit_wday)
Show the code
# Define a vector with the names of the days of the week
# Note: Adjust the start of the week (Sunday or Monday) as per your requirement
days_of_week <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")

# Replace numbers with day names
time_visit_wday$dow2 <- factor(days_of_week[time_visit_wday$dow],
                               levels = days_of_week)

4.2 HaNS-Login nach Uhrzeit

Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(hour) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  theme_minimal() +
  labs(
    title = "HaNS-Nutzer sind keine Frühaufsteher",
    x = "Uhrzeit",
    y = "Anteil"
  )

Show the code
 # coord_polar()
Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(hour) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  theme_minimal() +
  coord_polar()

4.3 Verteilung der HaNS-Besuche nach Wochentagen

Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = dow2, y = prop)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
       x = "Wochentag",
       y = "Anteil")

Show the code
 # coord_polar()
Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = dow2, y = prop)) +
  geom_col() +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
       x = "Wochentag",
       y = "Anteil")  +
  coord_polar()

4.3.1 HaNS-Login nach Wochentagen Uhrzeit

Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2, hour) |> 
  group_by(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  facet_wrap(~ dow2) +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
       x = "Wochentag",
       y = "Anteil")

Show the code
 # coord_polar()
Show the code
time_visit_wday |> 
  as_tibble() |> 
  count(dow2, hour) |> 
  group_by(dow2) |> 
  mutate(prop = n/sum(n)) |> 
  ggplot(aes(x = hour, y = prop)) +
  geom_col() +
  facet_wrap(~ dow2) +
  theme_minimal() +
  labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
       x = "Wochentag",
       y = "Anteil") +
  coord_polar()

4.4 Anzahl der Visits nach Datum (Tagen) und Uhrzeit (bin2d)

Show the code
time2 <- 
time_visit_wday |> 
  ungroup() |> 
  mutate(date = as.Date(date_time))

time2 |> 
  ggplot(aes(x = date, y = hour)) +
  geom_bin2d(binwidth = c(1, 1)) + # (1 day, 1 hour)
  scale_x_date(date_breaks = "1 month") +
  theme(legend.position = "bottom") +
  scale_fill_viridis_c() +
  labs(caption = "Each x-bin maps to one week")

4.5 Anzahl der Visits nach Datum (Wochen) und Uhrzeit (bin2d)

Show the code
time2 |> 
  ggplot(aes(x = date, y = hour)) +
  geom_bin2d(binwidth = c(7, 1)) +  # 1 week, 1 hour
  scale_x_date(date_breaks = "1 week", date_labels = "%W") +
  theme(legend.position = "bottom") +
  scale_fill_viridis_c()  +
  labs(x = "Week number in 2023/2024",
       caption = "Each x-bin maps to one week")

4.6 Anzahl der Visits nach Datum (Wochen) und Wochentag (bin2d)

Show the code
time2 |> 
  ggplot(aes(x = date, y = dow)) +
  geom_bin2d(binwidth = c(7, 1)) +  # 1 week, 1 hour
  scale_x_date(date_breaks = "1 week", date_labels = "%W") +
  theme(legend.position = "bottom") +
  scale_fill_viridis_c()  +
  labs(x = "Week number in 2023/2024",
       caption = "Each x-bin maps to one week",
       y = "Day of Week") +
  scale_y_continuous(breaks = 1:7)

5 KI-Gebrauch

5.1 Welcher Anteil der Nutzenden klickt auf ein Wort im Transkript?

Show the code
tar_load(data_slim)
Show the code
data_slim |> 
  filter(type == "subtitle") |> 
  filter(!is.na(value) & value != "") |> 
  count(click_transcript_word = str_detect(value, "click_transcript_word")) |> 
  mutate(prop = n/sum(n)) |> 
  gt()
click_transcript_word n prop
FALSE 158632 0.99881627
TRUE 188 0.00118373

5.2 … Aufteilung nach Monaten

Show the code
tar_load(ai_transcript_clicks_per_month)
Show the code
ai_transcript_clicks_per_month |> 
  gt()
click_transcript_word n
2024-3
FALSE 27800
2024-4
FALSE 93760
2024-5
FALSE 29970
2024-6
FALSE 7290
NA-NA
FALSE 158632
TRUE 188